/**
 * 
 */
package com.attilax.dataspider;

import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.attilax.core;
import com.attilax.concur.TaskUtil;
import com.attilax.curr.PoolUtil;
import com.attilax.exception.ExUtil;
import com.attilax.fileTrans.ConnEx;
import com.attilax.img.imgx;
import com.attilax.io.filex;
import com.attilax.io.pathx;
import com.attilax.lang.text.strUtil;
import com.attilax.lbs.NoRztEx;
import com.attilax.net.HttpUtil;
import com.attilax.net.websitex;
import com.attilax.util.urlUtil;
import com.attilax.web.UrlX;
import com.google.common.collect.Lists;

/**
 * @author ASIMO
 *
 */
public class TsaolyoNetDataSpider extends PicSpider {

	public int pages;
	public int pageStart;
	public String picSaveDir;
	private int startPage;
	private int endPage;
	TaskExeReport rpt = new TaskExeReport();

	/**
	 * @author attilax 老哇的爪子
	 * @throws NoRztEx
	 * @throws ConnEx
	 * @throws IOException
	 * @since p17 d_b_0
	 */
	public static void main(String[] args) {
		// 5bu6562R
		System.setProperty("endPage", "10");
		TsaolyoNetDataSpider x = new TsaolyoNetDataSpider();
		// x.fileName=args[0];// "c:\\r2.csv";

		x.picSaveDir = "c:\\0picSaveDir";
		x.startPage = Integer.parseInt(System.getProperty("startPage", "1"));
		x.endPage = Integer.parseInt(System.getProperty("endPage"));
		x.perPicDelay = 1000;
		;

		x.exec();
		// t2(x);

		String picurl = "http://cdn1.snapgram.co/imgs/2016/08/01/marker-small.png";
		String name2 = filex.getFileName(picurl);
		// System.out.println(name2);
		System.out.println("--fi0055667788");

		//

	}

	private static void t2(TsaolyoNetDataSpider x) {
		String h = "c:\\new 36.html";
		String h2 = filex.read(h, "utf8");
		List li = x.getArtListByPagehtml(h2);
		System.out.println(li);

		String s3 = "http://view.news.qq.com/original/intouchtoday/n3660.html";
		System.out
				.println(x
						.getAbsUrlPic(s3,
								"./art.net_files/22088668b119b1691940c03f61ef6ea5a08094fc.jpg"));

		String h5 = filex.read("c:\\art.net.html", "gbk");
		List li5 = x.getPics_byHtml(h5,
				"http://cl.cmcher.com/htm_data/16/1609/2082995.html");
		System.out.println(li5);

		x.downPic(
				"http://img03.cweb-pix.com/images/2016/02/18/220886248d19f404ee8f601798bea7e4edb4377d.jpg",
				"tt");
	}

	filex fx;
	public String fileName;
	public String kw;
	private ExecutorService picPool;
	public AtomicInteger picPoolfinish=new AtomicInteger(0);
	/**
	 * stand
	 */
	public String exec() {
		try {
			Thread.sleep(15000);
		} catch (InterruptedException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}
		System.out.println("--start now");
		artPool = Executors.newFixedThreadPool(40);
		picPool = Executors.newFixedThreadPool(300);
		// fx=new filex(fileName);
		List<String> pages = getpageUrls();
		int n = 1;
		for (String ListUrls : pages) {
			this.nowPageIdx = n;
			try {

				exec_singlePage(ListUrls);

			} catch (Exception e) {
				e.printStackTrace();
			}
			n++;

		}

		artPool.execute(PoolUtil.endTask(artPool, "artPool"));
		while (true) // check liage pool
		{
			TaskUtil.sleep_sec(150);

			//q930  add the ajd count base on dif submit pic..
			int ajd = 3; // adjust for last
			if(rpt.sumbit_pic_count.get()<100  )
				ajd=2;
			else if(rpt.sumbit_pic_count.get()>100 && rpt.sumbit_pic_count.get()<1000)
				 ajd = 5;
			else if(rpt.sumbit_pic_count.get()>1000 && rpt.sumbit_pic_count.get()<100000)
				ajd=10;
			else
				ajd=50;
				
			
			if (this.picPoolfinish.get()==1 && artPool.isShutdown()
					&& rpt.nowIndex_alreadyCount.get() + ajd >= rpt.sumbit_pic_count
							.get()) {
//				TaskUtil.sleep_sec(10);
//				if (artPool.isShutdown()
//						&& rpt.nowIndex_alreadyCount.get() + ajd >= rpt.sumbit_pic_count
//								.get()) {
					// last waittime
				//	picPool.execute(PoolUtil.endTask(picPool, "picPool"));
					break;
	//			}
			}
			
			this.picPoolfinish.set(1);

		}

		// artPool.execute(PoolUtil.endTask(artPool,"artPool"));

		return null;

		// return tab.html();

	}

	int nowPageIdx;
	int nowArtIdx;
	int nowpicIdx;
	ExecutorService artPool;

	/**
	 * stand
	 * 
	 * @author attilax 老哇的爪子
	 * @param page
	 * @throws ConnEx
	 * @throws NoRztEx
	 * @throws ParseLsitEx
	 * @since p17 g_37_c
	 */
	private void exec_singlePage(String pageUrl) {
		// if(page>0)
		// break;

		// System.out.println ( new websitex(). WebpageContent(url, "gbk", 3));
		String html = null;
		try {
			String api = pageUrl;
			// http://www.czvv.com/k5bu6562Rp0c0cc0s0m0e0f0d0.html
			websitex wc = new websitex();
			wc.refer = "http://www.czvv.com/";// http://www.czvv.com/
			html = wc.WebpageContent(api, "gbk", 60);
		} catch (Exception e) {
			// e.printStackTrace();
			// throw new ConnEx(e.getMessage());
			ExUtil.throwExV3(e, "-- url:" + pageUrl);
		}
		// ================trace
		// if (new File("C:\\traceOk").exists())
		// filex.save_safe(html, "c:\\rztTrace.html");
		// else
		// filex.del("c:\\rztTrace.html");
		// filex.read("c:\\rzt.html", "gbk");
		// filex.write(path + ".htm", html);
		// html=filex.read("c:\\00.htm");
		List<String> li = getArtListByPagehtml(html);

		for (String artUrl : li) {
			System.out.println("--now start process url is :" + artUrl);

			try {
				Runnable runnable = new Runnable() {

					@Override
					public void run() {
						try {
							processArt(artUrl);

						} catch (Exception e) { // ingro err
							new RuntimeException("--url:" + artUrl, e)
									.printStackTrace();
						} finally {
							rpt.nowIndex_article.incrementAndGet();

							showRpt();
						}

					}

				};

				artPool.execute(runnable);
				rpt.sumbit_arts_count.incrementAndGet();

				// core.newThread(runnable, "threadName4arturl:" + artUrl);
			} catch (Exception e) {
				new RuntimeException("--url:" + artUrl, e).printStackTrace();
			}

		}

		// artPool.
		// submit_endTask_for_artpool();

	}

	/**
	 * attilax 2016年9月29日 下午11:48:22
	 */
	private void submit_endTask_for_artpool() {
		// TODO Auto-generated method stub

	}

	private void showRpt() {
		int now = rpt.nowIndex_alreadyCount.get();
		System.out.println("----***pic/picSubmit: " + String.valueOf(now) + "/"
				+ String.valueOf(rpt.sumbit_pic_count.get())
				+ ",,now art/artSubmit:" + rpt.nowIndex_article.get() + "/"
				+ String.valueOf(rpt.sumbit_arts_count.get()));
	}

	/**
	 * attilax 2016年9月27日 下午4:08:27
	 * 
	 * @param artUrl
	 */
	private void processArt(String artUrl) {
		// article
		String html = getArtHtml(artUrl);
		Map a = new ArticleService().process(html);
		String title = (String) a.get("title");
		List<String> li = getPics_byHtml(html, artUrl);

		for (String picurl : li) {

			try {

				Runnable runnable = new Runnable() {

					@Override
					public void run() {
						try {
							picPoolfinish.set(0);
							downPic(picurl, title);
						} catch (Exception e) {
							new RuntimeException("--url:" + picurl, e)
									.printStackTrace();
							// e.printStackTrace();
						} finally {
							picPoolfinish.set(0);
							rpt.nowIndex_alreadyCount.incrementAndGet();
							showRpt();
						}

					}

				};
				picPoolfinish.set(0);
				Thread.sleep(perPicDelay);
				picPool.execute(runnable);
				rpt.sumbit_pic_count.incrementAndGet();
				// core.newThread(runnable, "threadName" + picurl);
			} catch (Exception e) {
				new RuntimeException("--url:" + picurl, e).printStackTrace();
				;
			}

		}

	}
	int perPicDelay= 1000;
	/**
	 * attilax 2016年9月27日 下午4:55:34
	 * 
	 * @param artUrl
	 * @return
	 */
	private String getArtHtml(String artUrl) {
		websitex wc = new websitex();
		wc.refer = "http://www.czvv.com/";// http://www.czvv.com/
		String html = "";
		try {
			html = wc.WebpageContent(artUrl, "gbk", 60);
		} catch (TimeoutException e) {
			ExUtil.throwExV2(e, "--url:" + artUrl);
		}
		return html;
	}

	/**
	 * abs http://cl.cmcher.com/htm_data/16/1609/2082995.html
	 * 
	 * @author attilax 老哇的爪子
	 * @since p17 e_3_r
	 */
	// private Object getCurPageUrl(int i) {
	// String s="http://www.czvv.com/k"+ Base64. encode(kw,"utf-8")
	// +"p@pagec0cc0s0m0e0f0d0.html".replaceAll("@page", String.valueOf(i-1));
	// return s;
	// }

	/**
	 * attilax 2016年9月27日 下午4:38:55
	 * 
	 * @param artUrl
	 * @return
	 */

	/**
	 * attilax 2016年9月27日 下午4:34:55
	 * 
	 * @param picurl
	 * @param title
	 */
	public void downPic(String picurl, String title) {

		System.out
				.println("--start down pic,title : " + title + ",,," + picurl);
		String name2 = filex.getFileName(picurl);
		String ext = filex.getExtName(picurl);
		if (ext.trim().equals(""))
			name2 = name2 + ".jpg";
		else if (ext.trim().length() > 5)
			name2 = name2 + ".jpg";
		else if (!ext.trim().toLowerCase().equals("jpg")
				&& !ext.trim().toLowerCase().equals("jpeg")
				&& !ext.trim().toLowerCase().equals("png"))
			name2 = name2 + ".jpg";
		String subDir = filex.fileNameEncode(title);
		name2 = filex.fileNameEncode(name2);

		String savepath = this.picSaveDir + "\\" + subDir + "\\" + name2;

		delDownHalfFile(savepath);
		if (new File(savepath).exists()) // already down full and ok
		{
			return; // brk jmp
		}

		if (picurl.trim().toLowerCase().endsWith(".gif"))
			return;

		try {

			new HttpDownloader().down(picurl, savepath, 90);
		} catch (Exception e) {
			// if (new File(savepath).exists())
			// new File(savepath).delete();
			ExUtil.throwExV2(e, "--url:" + picurl);
		} finally {
			try {
				delDownHalfFile(savepath);
			} catch (Exception e2) {
				e2.printStackTrace();
			}

		}

	}

	private void delDownHalfFile(String savepath) {
		if (new File(savepath).exists()) {
			int brk_pct = new imgx().GrayLinePercent(savepath);
			if (brk_pct > 10) {
				filex.move(savepath, "c:\\0picSaveDir_downEx", this.picSaveDir);
				;
				// new File(savepath).delete();
				System.out.println("--del file:" + savepath);
			}
		}
	}

	/**
	 * @author attilax 老哇的爪子
	 * @param artUrl
	 * @since p17 d_58_42
	 */
	private List getPics_byArtUrl(String artUrl) {

		String html = getArtHtml(artUrl);

		return getPics_byHtml(html, artUrl);

		// art.net.html

		// String line = name + "," + tel + "," + lyesyiren + "," + addr;
		// fx.appendLine_flush_safe(line);
		// System.out.println(line);

	}

	/**
	 * attilax 2016年9月27日 下午4:04:56
	 * 
	 * @param artUrl
	 * @param pic
	 * @return
	 */
	public String getAbsUrlPic(String artUrl, String pic) {
		// UrlX.getPath(artUrl);
		return UrlX.getPath(artUrl) + "/" + pic;
	}

	/**
	 * abs attilax 2016年9月27日 下午3:07:36
	 * 
	 * @param string
	 * @return
	 */
	public List<String> getPics_byHtml(String html, String artUrl) {
		List<String> li = Lists.newArrayList();
		Document doc = null;
		doc = Jsoup.parse(html);
		Elements input = doc.getElementsByTag("input");
		for (Element element : input) {

			try {
				addPic2li(element, li, artUrl);
				// if(r.equals("continue"))
				// continue;
			} catch (Exception e) {
				e.printStackTrace();
			}

		}

		Elements imgs = doc.getElementsByTag("img");
		for (Element element : imgs) {

			try {
				addPic2li(element, li, artUrl);
				// if(r.equals("continue"))
				// continue;
			} catch (Exception e) {
				e.printStackTrace();
			}

		}
		return li;
	}

	/**
	 * attilax 2016年9月28日 上午12:19:01
	 * 
	 * @param element
	 * @param li
	 * @param artUrl
	 */
	private void addPic2li(Element element, List<String> li, String artUrl) {
		String pic = element.attr("src");
		if (pic.trim().length() < 10)
			return;

		// abs url
		if (pic.trim().startsWith("http")) {
			pic = clrPicUrl(pic);
			if (pic.trim().toLowerCase().endsWith(".gif"))
				return;
			if (!li.contains(pic))
				li.add(pic);
			return;
		}

		// if relate path
		if (pic.endsWith("-br-")) {
			pic = pic.replace("-br-", "");
			pic = pic.trim();
		}
		pic = getAbsUrlPic(artUrl, pic);
		if (pic.trim().toLowerCase().endsWith(".gif"))
			return;
		if (!li.contains(pic))
			li.add(pic);
		li.add(pic);
		// return "";

	}

	/**
	 * attilax 2016年9月27日 下午4:33:34
	 * 
	 * @param pic
	 * @return
	 */
	private String clrPicUrl(String pic) {

		if (pic.endsWith("<br>")) {
			pic = pic.replace("<br>", "");
			pic = pic.trim();
		}
		return pic;
	}

	/**
	 * abs * @author attilax 老哇的爪子
	 * 
	 * @param html
	 * @throws NoRztEx
	 * @throws ParseLsitEx
	 * @since p17 d_57_m
	 */
	public List getArtListByPagehtml(String html) {

		List<String> li = Lists.newArrayList();

		// new 36.html
		try {
			Document doc = null;
			doc = Jsoup.parse(html);
			Elements tabs = doc.getElementsByTag("a");

			for (Element element : tabs) {
				if (element.attr("href").contains("htm_data")) {
					String artUrl = "http://cl.cmcher.com/"
							+ element.attr("href");
					if (!li.contains(artUrl))
						li.add(artUrl);
				}
			}

		} catch (Exception e) {
			e.printStackTrace();
			// System.out.println("norzt:" + addr);
			throw new RuntimeException("noRzt");
		}

		return li;

	}

	/**
	 * abs
	 * 
	 * @author attilax 老哇的爪子
	 * @since p17 d_55_h
	 */
	public List<String> getpageUrls() {
		String tmp = "http://cl.cmcher.com/thread0806.php?fid=16&search=&page=$p$";
		List<String> li = Lists.newArrayList();
		for (int i = startPage; i <= endPage; i++) {
			String t2 = tmp.replace("$p$", String.valueOf(i));
			li.add(t2);
		}
		return li;
	}

}
